/* for xc:
% cc -O -o cut cut.c
*/

/*
 * cut - a recreation of the Unix(Tm) cut(1) command.
 *
 * syntax:  cut -cLIST[ file1 file2 ...]
 *		cut -fLIST [-d char][ -s][ file1 file2 ...]
 *
 *	Copyright (C) 1984 by David M. Ihnat
 *
 * This program is a total rewrite of the Bell Laboratories Unix(Tm)
 * command of the same name, as of System V.  It contains no proprietary
 * code, and therefore may be used without violation of any proprietary
 * agreements whatsoever.  However, you will notice that the program is
 * copyrighted by me.  This is to assure the program does *not* fall
 * into the public domain.  Thus, I may specify just what I am now:
 * This program may be freely copied and distributed, provided this notice
 * remains; it may not be sold for profit without express written consent of
 * the author.
 * Please note that I recreated the behavior of the Unix(Tm) 'cut' command
 * as faithfully as possible; however, I haven't run a full set of regression
 * tests.  Thus, the user of this program accepts full responsibility for any
 * effects or loss; in particular, the author is not responsible for any losses,
 * explicit or incidental, that may be incurred through use of this program.
 *
 * I ask that any bugs (and, if possible, fixes) be reported to me when
 * possible.  -David Ihnat (312) 784-4544 ihuxx!ignatz
 */

#include <stdio.h>

extern int errno;

#define CPM

/* I'd love to use enums, but not everyone has them.  Portability, y'know. */
#define BADLIST		1
#define NODELIM		2
#define NOFIELDS	3
#define USAGE		4
#define BADFILE		5
#define BACKERR		6
#define TOOLONG		7

#define	TAB	'\t';
#define BACKSP	0x8
#define	_MAXSZ	512
#define COMMAND "cut"

#define	IGNOREIT	0
#define CUTIT		1

char outbuf[_MAXSZ];			/* Processed output buffer */
char rawbuf[_MAXSZ];			/* Raw holding buffer for field mode */
#define	FLDFLAG	fields[0]		/* Used for EOL processing */
short int fields[_MAXSZ];		/* Max num of fields or line length */

char *cmdnam;

short int cflag,fflag,sflag;
char delim = TAB;

main(argc,argv)
int argc;
char **argv;
{
	FILE *fileptr;
	FILE *fopen();
	int filecnt;

	cflag = fflag = sflag = 0;

#ifdef CPM
	cmdnam = COMMAND;
#else
	cmdnam = *argv;
#endif

	/* Skip invocation name */
	argv++;
	argc--;

	/* Most compilers initialize storage to zero; but don't count on it. */

	for(filecnt = 0;filecnt < _MAXSZ;filecnt++)
		fields[filecnt] = IGNOREIT;

	/* First, parse input options */

	while(argv[0][0] == '-')
	{
		switch(argv[0][1])
		{
			case 'c':
				/* Build the character position list */
				if(fflag || cflag)
					prerr(USAGE,NULL);
				else
				{
					cflag++;
					setflds(&argv[0][2]);
				}
				break;

			case 'f':
				/* Build the field position list */
				if(fflag || cflag)
					prerr(USAGE,NULL);
				else
				{
					fflag++;
					setflds(&argv[0][2]);
				}
				break;

			case 'd':
				/* New delimiter */
				delim = argv[0][2];
				if(delim == '\0')
					prerr(NODELIM,NULL);

				break;

			case 's':
				sflag++;
				break;

			default:
				prerr(USAGE,NULL);
		}
		argv++;
		argc--;
	}

	/* Finished all setup.  If no fields selected, tell them and exit. */
	if(!(cflag | fflag))
		prerr(BADLIST,NULL);

	if(!FLDFLAG)
		prerr(NOFIELDS,NULL);

	/*
	 * If no files specified, process stdin.  Otherwise,
	 * process on a file-by-file basis.
	 */
	 if(argc == 0)
		dofile(stdin);
	else
		for(filecnt = 0;filecnt < argc;filecnt++,argv++)
			if((fileptr = fopen(argv[0],"r")) == (FILE *)NULL)
				prerr(BADFILE,argv);
			else
			{
				dofile(fileptr);
				fclose(fileptr);
			}
}

setflds(fldstr)
char *fldstr;
{
	/*
	 * The string, character or field, must have one of the 
	 * following formats:
	 *
	 *	n
	 *	n,m[,...]	where n<m
	 *	a-b		where a<b
	 *	-n,m		where n<m; implies 1-n
	 *	n-		where - implies to end of line or last field
	 */
	int index,minflag,value,fldset;

	minflag = 0;
	value = 0;
	index = 1;
	FLDFLAG = 0;

	for(;;)
	{
		switch(*fldstr)
		{
			case '-':
				/* Starting a range */
				if(minflag)
					prerr(BADLIST,NULL);
				minflag++;
				fldstr++;

				if(value)
				{
					if(value >= _MAXSZ)
						prerr(BADLIST,NULL);

					index = value;
				}else
					index = 1;

				value = 0;
				break;
			
			case ',':
			case '\0':
			/* Ending the string, or this field/column sublist */
				if(minflag) /* Some damnable range */
				{	/* Ranges are nasty.  Possibles:
					 * -n,a-n,n-.  In any case, index
					 * contains the start of the range.
					 */
					if(!value)
					{	/* From index to EOL */

						FLDFLAG = index;
						fldset++;
						value = 0;
					}else
					{
						if(value >= _MAXSZ)
							prerr(BADLIST,NULL);

						if(value < index)
							prerr(BADLIST,NULL);

						/* Already a TOEOL sequence? */
						if(FLDFLAG)
						{
			/*
			 * Yes.  Now...is the ne w sequence already
			 * contained by the old one? If so, no processing
			 * is necessary.
			 */
					if(FLDFLAG > index)
					{
			/*
			 * No, the new s equence starts before the old.
			 * Does the rang e extend into the current
			 * EOL range? If so, simply move the EOL marker.
			 */
						if(FLDFLAG < value)
						{
							FLDFLAG = index;
						}else
						/* Simple range. Fill it. */
					for(; index <= value ;index++)
						fields[index] = CUTIT;

				/* In any case, some fields were selected. */
						fldset++;
							}
						/* Ok, no TOEOL sequence */
						}else
						{
					for(;index <= value;index++)
					{
						fields[index] = CUTIT;
					}
						fldset++;
					}
						value = 0;
					}
					/* Reset the field-in-progress flag. */
					minflag = 0;
				}else
					if(value)
					{
						if(value >= _MAXSZ)
							prerr(BADLIST,NULL);

						fields[value] = CUTIT;
						value = 0;
						fldset++;
					}


				if(*fldstr == '\0')
				{
			/*
			 * Last bit of processing.  If there was an EOL,
			 * fill the array from the EOL point.  In any case,
			 * if there were any fields selected, leave the FLDFLAG
			 * value non-zero on return.
			 */
					if(FLDFLAG)
				for(index = FLDFLAG; index < _MAXSZ; index++)
					fields[index] = CUTIT;

					if(fldset)
						FLDFLAG = 1;

					return(0);
				}

				fldstr++;
				break;

			default:
				if((*fldstr < '0' ) || (*fldstr > '9' ))
					prerr(BADLIST,NULL);

				else
				{
					value = 10 * value + *fldstr - '0';
					fldstr++;
				}
		}
	}
}

dofile(fno)
FILE *fno;
{
	/*
	 * This will process the input files according to the rules specified
	 * in the fields array.
	 */

	 int charcnt,poscnt,bflag,doneflag,fldfound;
	 register int c;

	 char *inbufptr, *rawbufptr;

	 do
	 {
		inbufptr =  outbuf;
		rawbufptr = rawbuf;
		charcnt =  bflag = doneflag = fldfound = 0;
		poscnt = 1;

		do
		{
			c = fgetc(fno);
			if(c == EOF)
			{
				/* That's it for this file or stream */
				doneflag++;
				break;
			}

			if(cflag)
			{
				/*
				 * In character scan mode.  Look to see if
				 * it's an NROFF-type underlined character;
				 * if so, then don't count the backspace.
				 * Notice that this could cause a buffer
				 * overflow in the worst case situation...
				 * but that's MOST unlikely.
				 */

				if(c == BACKSP)
				{
					if(bflag)
						prerr(BACKERR);
					else
					{
						bflag++;
						*inbufptr++ = c;
					}
				}else
				{
					/*
					 * Valid character.  If it's to be sent,
					 * stow it in the outbuffer.
					 */
					 bflag = 0;

					 if(++charcnt == (_MAXSZ - 1))
						prerr(TOOLONG);

					 if(fields[charcnt] && (c != '\n'))
						*inbufptr++ = c;
				}
			}else
			{
				/*
				 * Field processing.  In this case, charcnt
				 * does indicate processed characters on the
				 * current line, but that is all.  Notice that
				 * ALL characters are initially stowed in the
				 * raw  buffer, until at least one field has
				 * been found.
				 */
				 if(fields[poscnt])
				 {
					/* Ok, working on a field.  It,
					 * and its terminating delimiter,
					 * go only into the processed buffer.
					 */
					 fldfound = 1;
					 if(c != '\n')
					 	*inbufptr++ = c;
				}else
					if(!fldfound)
					{
						charcnt++;
						if(c != '\n')
							*rawbufptr++ = c;
					}
				/*
				 * In any case, if a delimiter, bump the field
				 * indicator.
				 */
				 if(c == delim)
					poscnt++;
			}
		}while(c != '\n');

		if((cflag && charcnt) || (fflag && fldfound))
		{
			/*
			 * No matter what mode, something was found. Print it.
			 */

			if(fflag && (*(inbufptr-1) == delim))
				--inbufptr; /* Supress trailing delimiter */

			*inbufptr = '\0'; /* But null-terminate the line. */
			puts(outbuf);
		}else
			if((fflag && (!sflag)) && charcnt)
			{
				/*
				 * In this case, a line with some characters,
				 * no delimiters, and no supression.  Print it.
				 */

				 *rawbufptr = '\0';
				 puts(rawbuf);
			}

	 }while(!doneflag);
}

prerr(etype, estring)
int etype;
char *estring;
{
	switch(etype)
	{
		case BADLIST:
			fprintf(stderr,"%s : bad list for c/f option\n",cmdnam);
			break;

		case USAGE:
			fprintf(stderr,"Usage: %s [-s] [-d<char>] {-c<list> | -f<list>} file ...\n",cmdnam);
			break;

		case NOFIELDS:
			fprintf(stderr,"%s : no fields\n",cmdnam);
			break;

		case NODELIM:
			fprintf(stderr,"%s : no delimiter\n",cmdnam);
			break;

		case BADFILE:
			fprintf(stderr,"Cannot open: %s : %s\n",cmdnam,estring);
			break;
		
		case BACKERR:
			fprintf(stderr,"%s : cannot handle multiple adjacent backspaces\n",cmdnam);
			break;

		case TOOLONG:
			fprintf(stderr,"%s : line too long\n",cmdnam);
	}
	exit(2);
}
